#This to obtain:
#table_2.tex

library("broom")
library("dplyr")
library("grid")
library("tidyr")
library("sandwich")
library("lfe")
library("broom")
library("dplyr")
library("tidyr")
library("grid")
library("stargazer")
library("lmtest")
library("multiwayvcov")
library("ggplot2")
library("glue")
library("spdep")
library("stringr")
library("lemon")
library("readxl")
library("spatialreg")

#setwd("C:/Users/bogdanp/")
setwd("/Users/bgpopescu/")

#Reading Polygons
x<-st_layers(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb")


HRV_adm0 <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                    layer="HRV_adm0_wgs")

HRV_adm3 <- st_read(dsn="./Dropbox/Legacies_Project/Analysis/data/data.gdb",
                    layer="HRV_adm3")
HRV_adm3<-st_simplify(HRV_adm3, dTolerance = 0.005)
HRV_adm3<-subset(HRV_adm3, select = c(ID_2))
HRV_adm3_data = read_excel("./Dropbox/Legacies_Project/Analysis/data/merge.xlsx", sheet=1, col_names = TRUE, skip = 0)


final_sp<-left_join(HRV_adm3, HRV_adm3_data, by = c("ID_2"="ID_2"))
final_sp<-subset(final_sp, bosnia_border_mun==0)

#################################
#Function for spatial regression#
#################################
spatial_regression <- function(data_sample, y, x, unique_observation_id) {
  clean_x = clean_list(x, "quad")
  clean_x = clean_list(clean_x, "bfe")
  
  specification = as.formula(paste(glue("{y}~"), 
                                   paste(clean_x, collapse = "+")))
  bw<-subset(data_sample, select =c(x, y, unique_observation_id))
  #Removing NAs
  bw<-bw[!sf::st_is_empty(bw), ]
  bw<-na.omit(bw)
  #Step1: the function "poly2nb" builds a neighbours list based on
  list_nb=poly2nb(bw, row.names = dplyr::pull(bw, unique_observation_id))
  #Step2: Convert nb to listw type
  list_nb_w=tryCatch(nb2listw(list_nb, zero.policy=TRUE), error=function(err) NA)
  #Step3: Spatial Filtering
  reg1_res_sp<-tryCatch(SpatialFiltering(specification,
                                         data = bw,
                                         nb = list_nb,
                                         style="W",
                                         ExactEV = F,
                                         zero.policy = T,
                                         na.action=na.exclude),
                        error=function(err) NA)
  
  print("Done Spatial Filtering")
  #Step4: Include fitted results"
  bw_fitted<-tryCatch(cbind(bw, as.data.frame(fitted(reg1_res_sp)),
                            id = row.names(bw)), error=function(err) 
                            {print("NO NEIGHBORS"); data_sample})
  #Keeping track of indices
  #This will allow us to see exactly which observations were used in the regressions
  row.names(bw_fitted) <- bw_fitted$id
  bw_fitted<-subset(bw_fitted, select = -c(id))
  list_fitted_ei<-names(bw_fitted[, grepl("vec", names(bw_fitted))])
  list_fitted_ei<-list_fitted_ei[list_fitted_ei != "Shape"]
  all_vars<-append(x, list_fitted_ei)
  specification_with_eigenv = as.formula(paste(glue("{y}~"), 
                                               paste(all_vars, collapse = "+")))
  
  print(specification_with_eigenv)
  model1 <- tryCatch(lm(specification_with_eigenv, data = bw_fitted, na.action = na.exclude), error=function(err) NA)
  
  reg_moran<-moran.test(x=resid(model1),listw=list_nb_w, zero.policy = T)$statistic
  reg_moran_p<-moran.test(x=resid(model1),listw=list_nb_w, zero.policy = T)$p.value
  reg_moran_star<-paste(round(reg_moran, digits = 3), "",
                        ifelse(reg_moran_p<.01,"***",
                               ifelse(reg_moran_p<.05,"**",
                                      ifelse(reg_moran_p<.1,"*",
                                             ""))), "", "", sep = "")
  
  
  return(list(model1, reg_moran_star))
}


######################################
#Function for cleaning the list of FE#
#####################################

clean_list <- function(old_list, particle) {
  term <- paste(glue("^{particle}"))
  new_list = list() 
  for (i in old_list) {
    #print(i)
    temp_list = c()
    
    for (j in i){
      if (!grepl(term, j)){
        temp_list <- c(temp_list, j)
      }
    }
    l = length(new_list)
    new_list[[l+1]] <- temp_list
  }
  return(new_list)}

#clean_list(specifications, "SN_")


###############################
#Function for rerunning models#
###############################
rerun_regression<-function(regression_name, string_dv, orig_data_for_clusters, cluster_name){
  #Process:
  #-takes already existing regression objects, associates them with an 
  #"official" label, calculates robust clustered SE and CI based on a cluster
  #that is in the original dataset
  #and puts the results in a tibble
  #-this will be used for ggplot later
  
  #Input:
  #-Regression model object after lm
  #-The label for the DV that you want displayed in ggplot
  #-original dataset that contains thhe names of the clusters
  
  #Output:
  #-A tibble with regression coefficients from the model that you ran with:
  #--robust SE
  #--robust CI
  
  #Recreate dataframe from the regression object
  remake_data<-regression_name$model
  #Scale the DV
  remake_data[1]<-scale(remake_data[1])
  #Take the string of the DV and IV
  dv<-regression_name$terms[[2]]
  ivs<-regression_name$terms[[3]]
  ivs_str<-paste(ivs)[2]
  dv_str<-paste(dv, "~", collapse = "")
  dv_ivs_str<-paste(dv_str, ivs_str, sep=" ")
  #Create specification and run model
  specification = as.formula(dv_ivs_str)
  model <- lm(specification, data = remake_data)
  no_obs<-nobs(model)
  no_obs<-paste("Obs: ", as.character(no_obs), sep = "")
  #Calculate robust SE
  model_se<-se_robust_cluster(model, orig_data_for_clusters, cluster_name)
  #Create tibble
  model_tidy<-tidy(model)
  model_tidy$no_obs<-no_obs
  model_se_tidy<-tidy(model_se)
  names(model_se_tidy)[1] <- "term"
  names(model_se_tidy)[2] <- "robust.se"
  model_tidy2<-left_join(model_tidy, model_se_tidy, by = c("term"="term"))
  
  #Calculate CI
  df_ci<-as.data.frame(ci_robust_cluster(model, orig_data_for_clusters, cluster_name))
  df_ci <- cbind(term = rownames(df_ci), df_ci)
  names(df_ci)[2] <- "lb_95"
  names(df_ci)[3] <- "ub_95"
  
  #Merge CI to the dataframe
  model_tidy3<-left_join(model_tidy2, df_ci, by = c("term"="term"))
  #Only keep the treat variable
  model_tidy4<-subset(model_tidy3, term=="treat")
  model_tidy4$dv_official<-string_dv
  return(model_tidy4)
}

se_robust_cluster <- function(x, data_sample_shape, name_cluster){
  data_model<-x$model
  data_model$in_regression<-1
  data_model<-subset(data_model, select=c(in_regression))
  #Create index
  data_model <- tibble::rowid_to_column(data_model, "index")
  data2<-tibble::rowid_to_column(data_sample_shape, "index")
  #Merge by rowname or index
  data_merged <- left_join(data_model, data2, by=c("index" = "index"))
  #Remove any missing
  data_subset<-subset(data_merged, !is.na(in_regression))
  dat_clusters<-data_subset[[name_cluster]]
  res<-coeftest(x, vcov = cluster.vcov(x, dat_clusters, force_posdef=T))[, 2]
  return(res)}

ci_robust_cluster <- function(x, data_sample_shape, name_cluster){
  data_model<-x$model
  data_model$in_regression<-1
  data_model<-subset(data_model, select=c(in_regression))
  #Create index
  data_model <- tibble::rowid_to_column(data_model, "index")
  data2<-tibble::rowid_to_column(data_sample_shape, "index")
  #Merge by rowname or index
  data_merged <- left_join(data_model, data2, by=c("index" = "index"))
  #Remove any missing
  data_subset<-subset(data_merged, !is.na(in_regression))
  dat_clusters<-data_subset[[name_cluster]]
  res_ci<-coefci(x, vcov = cluster.vcov(x, dat_clusters, force_posdef=T))
  return(res_ci)}


#Analyzing data
data<-final_sp


specifications <- list(c('treat', 
                         'POINT_X', 'POINT_Y', 
                         'zagreb_distance', 
                         'bfe1', 'bfe2', 'bfe3', 'bfe4', 'bfe5', 'bfe6', 'bfe7',
                         'quad1', 'quad2', 'quad3'))


###########################
#rat_medical_personel_1857#
###########################
rat_medical_personel_1857<-spatial_regression(final_sp, "rat_medical_personel_1857", specifications[[1]], "ID_2")[[1]]
rat_medical_personel_1857_m<-spatial_regression(final_sp, "rat_medical_personel_1857", specifications[[1]], "ID_2")[[2]]

########################
#railroads_1869_density#
########################
railroads_1869_density<-spatial_regression(final_sp, "railroads_1869_density", specifications[[1]], "ID_2")[[1]]
railroads_1869_density_m<-spatial_regression(final_sp, "railroads_1869_density", specifications[[1]], "ID_2")[[2]]

###########################
#thoroughfare_1940_density#
###########################
thoroughfare_1940_density<-spatial_regression(final_sp, "thoroughfare_1940_density", specifications[[1]], "ID_2")[[1]]
thoroughfare_1940_density_m<-spatial_regression(final_sp, "thoroughfare_1940_density", specifications[[1]], "ID_2")[[2]]

##################################
#data$roads_1957_concrete_density#
##################################
roads_1957_concrete_density<-spatial_regression(final_sp, "roads_1957_concrete_density", specifications[[1]], "ID_2")[[1]]
roads_1957_concrete_density_m<-spatial_regression(final_sp, "roads_1957_concrete_density", specifications[[1]], "ID_2")[[2]]

###################
#pct_no_water 2011#
###################
pct_no_water_latlon<-spatial_regression(final_sp, "pct_no_water", specifications[[1]], "ID_2")[[1]]
pct_no_water_latlon_m<-spatial_regression(final_sp, "pct_no_water", specifications[[1]], "ID_2")[[2]]

#################
#pov_rate_income#
#################
pov_rate_income_latlon<-spatial_regression(final_sp, "pov_rate_income", specifications[[1]], "ID_2")[[1]]
pov_rate_income_latlon_m<-spatial_regression(final_sp, "pov_rate_income", specifications[[1]], "ID_2")[[2]]

#################
#pov_rate_income#
#################
share_lesseduc_25_latlon<-spatial_regression(final_sp, "share_lesseduc_25", specifications[[1]], "ID_2")[[1]]
share_lesseduc_25_latlon_m<-spatial_regression(final_sp, "share_lesseduc_25", specifications[[1]], "ID_2")[[2]]
mean_medical_personel_1857<-round(mean(final_sp$rat_medical_personel_1857, na.rm=T), digits = 3)
sd_medical_personel_1857<-round(sd(final_sp$rat_medical_personel_1857, na.rm=T), digits = 3)
mean_railroads_1869_density<-round(mean(final_sp$railroads_1869_density, na.rm=T), digits = 3)
sd_railroads_1869_density<-round(sd(final_sp$railroads_1869_density, na.rm=T), digits = 3)
mean_thoroughfare_1940_density<-round(mean(final_sp$thoroughfare_1940_density, na.rm=T), digits = 3)
sd_thoroughfare_1940_density<-round(sd(final_sp$thoroughfare_1940_density, na.rm=T), digits = 3)
mean_roads_1957_concrete_density<-round(mean(final_sp$roads_1957_concrete_density, na.rm=T), digits = 3)
sd_roads_1957_concrete_density<-round(sd(final_sp$roads_1957_concrete_density, na.rm=T), digits = 3)
mean_pct_no_water<-round(mean(final_sp$pct_no_water, na.rm=T), digits = 3)
sd_pct_no_water<-round(sd(final_sp$pct_no_water, na.rm=T), digits = 3)
mean_pov_rate_income<-round(mean(final_sp$pov_rate_income, na.rm=T), digits = 3)
sd_pov_rate_income<-round(sd(final_sp$pov_rate_income, na.rm=T), digits = 3)
mean_share_lesseduc_25<-round(mean(final_sp$share_lesseduc_25, na.rm=T), digits = 3)
sd_share_lesseduc_25<-round(sd(final_sp$share_lesseduc_25, na.rm=T), digits = 3)


column_labels= c("\\shortstack{Prop.\\\\ Doctors\\\\ 1857}",
                 "\\shortstack{Railroad\\\\ Density\\\\ 1869}",
                 "\\shortstack{Thoroughfare\\\\ Density\\\\ 1940}",
                 "\\shortstack{Asphalt\\\\ Roads\\\\ 1957}",
                 "\\shortstack{Pct. Dwellings\\\\ No Water\\\\ 2011}",
                 "\\shortstack{Pct. People\\\\ Risk of Poverty\\\\ 2011}",
                 "\\shortstack{Pct. Less\\\\ Educated People\\\\ 2011}")

list_models<-list(rat_medical_personel_1857,
                  railroads_1869_density,
                  thoroughfare_1940_density,
                  roads_1957_concrete_density,
                  pct_no_water_latlon,
                  pov_rate_income_latlon,
                  share_lesseduc_25_latlon)

multiple_res =stargazer(list_models,
                        column.labels= column_labels,
                        keep = c("treat"),
                        se = list(se_robust_cluster(rat_medical_personel_1857, data, "NAME_2"),
                                  se_robust_cluster(railroads_1869_density, data, "NAME_2"),
                                  se_robust_cluster(thoroughfare_1940_density, data, "NAME_2"),
                                  se_robust_cluster(roads_1957_concrete_density, data, "NAME_2"),
                                  se_robust_cluster(pct_no_water_latlon, data, "NAME_2"),
                                  se_robust_cluster(pov_rate_income_latlon, data, "NAME_2"),
                                  se_robust_cluster(share_lesseduc_25_latlon, data, "NAME_2")),
                        covariate.labels=c("Military Territory"),
                        dep.var.labels.include = F,
                        omit.stat=c("LL","ser","f", "rsq"), no.space=TRUE, float=FALSE,
                        add.lines=list(c("Mean", mean_medical_personel_1857,
                                         mean_railroads_1869_density,
                                         mean_thoroughfare_1940_density,
                                         mean_roads_1957_concrete_density,
                                         mean_pct_no_water,
                                         mean_pov_rate_income,
                                         mean_share_lesseduc_25),
                                       c("SD", sd_medical_personel_1857,
                                         sd_railroads_1869_density,
                                         sd_thoroughfare_1940_density,
                                         sd_roads_1957_concrete_density,
                                         sd_pct_no_water,
                                         sd_pov_rate_income,
                                         sd_share_lesseduc_25),
                                       c("Boundary FE", rep("Yes", 10)),
                                       c("Region FE", rep("Yes", 10)),
                                       c("Moran eigenvectors", rep("Yes", 10)),
                                       c("Moran's I Residual", rat_medical_personel_1857_m,
                                         railroads_1869_density_m,
                                         thoroughfare_1940_density_m,
                                         roads_1957_concrete_density_m,
                                         pct_no_water_latlon_m,
                                         pov_rate_income_latlon_m,
                                         share_lesseduc_25_latlon_m)))
note_latex <- "\\multicolumn{8}{l} {\\parbox[t]{20cm}{ \\footnotesize \\textit{Notes:} 
Coefficients and county-clustered robust standard errors in parentheses from OLS regression. 
$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. 
See codebook in the appendix for data sources and how the variables were calculated.}}"
multiple_res [grepl("Note", multiple_res)] <- note_latex
#cat(multiple_res, file="multiple_res.tex", sep="\n")
cat(multiple_res, file="./Dropbox/Legacies_Project/Paper/tables/table_2.tex", sep="\n")

